/*LIS Cross-section Data center in Luxembourg*/

/*email: usersupport@lisdatacenter.org*/

/*LIS Self Teaching Package 2022*/
/*Part II: Gender, employment, and wages*/
/*SAS version*/

/*last change of this version of the syntax: 15-01-2022*/


/*Exercise 5: Hourly wages, education, and country-specific variables*/

OPTIONS NONOTES NOSOURCE NOFMTERR NODATE NONUMBER NOCENTER LABEL LS=MAX PS=MAX ;
TITLE "";
%MACRO educ ;
	%LET i = 1 ;
	%DO %UNTIL (&i > 3) ;
		%LET ccyy = %SCAN(&all,&i) ;
		DATA &ccyy.h ;
		 SET &&&ccyy.h (KEEP=hid did own) ;
		RUN ;

		PROC SORT DATA=&ccyy.h ;
		  BY hid ;
		RUN ;
		DATA &ccyy.p ;
		 SET &&&ccyy.p (KEEP=hid did dname pwgt ppopwgt relation partner ageyoch age sex immigr educ educ_c emp status1 ptime1 hwage1) ; 
		RUN ;
		PROC SORT DATA=&ccyy.p ;
		  BY hid ;
		RUN ;

		DATA &ccyy ;
	 	 MERGE &ccyy.h &ccyy.p ;
			BY hid ;
				IF ((25 <= age <= 54) AND (relation LE 2200)) ; 
				IF (100 <= own <= 199) THEN homeowner = 1;
				IF (200 <= own <= 299) THEN homeowner = 0;
				IF (0 <= ageyoch < 6) THEN achildcat  = 1 ;
				ELSE IF (6 <= ageyoch < 18) THEN achildcat  = 2;
				ELSE  achildcat  = 0 ;
				IF (100 <= status1 <= 120) THEN depemp = 1 ;
				ELSE IF (200 <= status1 <= 240) THEN depemp = 0 ;
				ELSE depemp = . ;
		RUN ;
	PROC SORT DATA=&ccyy ;
	  BY dname sex ;
	RUN ;

	PROC FREQ DATA=current  ;
	  TABLES dname*educ*educ_c / NOFREQ NOCOL NOCUM MISSING ;
	  WEIGHT ppopwgt   ;
	RUN ;

		
		DATA &ccyy ;
		 SET &ccyy ;		
				hourwage = hwage1 ;
				IF hourwage=. THEN DELETE;
				IF (hourwage < 0 ) THEN hourwage = 0 ;
				hourwagelog=log(hourwage); 
				IF( (hourwagelog=.)  AND (hourwage^=.) ) THEN hourwagelog=0;
	PROC SORT DATA=&ccyy ;
	  BY did hourwagelog;
RUN ;
	PROC UNIVARIATE DATA=&ccyy NOPRINT ;
		  VAR hourwagelog ;
			WEIGHT ppopwgt ;
				OUTPUT OUT= temp P25=q25   P75=q75; 
		RUN ;
		DATA _NULL_; 
		  SET temp; 
			CALL SYMPUT("b",q25); 
			CALL SYMPUT("t",q75); 
		RUN; 
		DATA &ccyy ;
		 SET &ccyy ;
			iqr=&t-&b; 
			upper_bound=&t + (iqr * 3); 
			lower_bound=&b - (iqr * 3); 
			IF hourwage>exp(upper_bound) THEN hourwage=exp(upper_bound); 
			IF hourwage<exp(lower_bound) THEN hourwage=exp(lower_bound); 
		RUN ;

		%IF %EVAL(&i) = 1 %THEN 
			%DO ;
				DATA current ;
				 SET &ccyy ;
					ATTRIB achildcat   FORMAT=chcat. ;
				RUN ;
			%END ;
		%ELSE 
			%DO ;
				PROC APPEND BASE=current DATA=&ccyy FORCE ;
				RUN ;
			%END ;
		%LET i = %EVAL(&i+1) ;
	%END ;


  PROC MEANS DATA=current MEDIAN;
     CLASS   dname sex educ ;
	   TYPES  dname*sex*educ ;
	   VAR    hourwage ;
	  WEIGHT  ppopwgt  ;
	RUN ;

%MEND educ ;


PROC FORMAT ;
	VALUE chcat 
		0 =  'no children <18'
		1 =  '<6 years'
		2 =  '6-17 years'
		. =	 'missing'
	;
RUN ;

%LET all = us04 be04 gr04 ;
%educ
